Transpose
对输入数组按照指定维度顺序(perm)进行转置操作,并输出结果数组。
- 输入:
in_data - 输入数据地址。
num_axes - 数据维度数。
output_shape - 输出形状数组。
perm - 转置维度顺序数组。
strides - 输入数据每维步长。
out_strides - 输出数据每维步长。
core_mask - 核掩码(仅适用于共享存储版本)。
- 输出:
out_data - 转置结果地址。
- 支持平台:
FT78NEMT7004
备注
FT78NE 支持fp, dp, int8, int16, int32, clx64, cplx128
MT7004 支持hp, fp, i16, i32, cplx64
共享存储版本:
-
void fp_transpose_s(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const float *in_data, float *out_data, int core_mask)
-
void hp_transpose_s(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const half *in_data, half *out_data, int core_mask)
-
void dp_transpose_s(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const double *in_data, double *out_data, int core_mask)
-
void i8_transpose_s(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const int8_t *in_data, int8_t *out_data, int core_mask)
-
void i16_transpose_s(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const int16_t *in_data, int16_t *out_data, int core_mask)
-
void i32_transpose_s(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const int *in_data, int *out_data, int core_mask)
-
void c64_transpose_s(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const float *in_data, float *out_data, int core_mask)
-
void c128_transpose_s(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const double *in_data, double *out_data, int core_mask)
C调用示例:
1#include <stdio.h>
2#include <transpose.h>
3
4int main() {
5 float *input = (float *)0xA0000000; // 输入在DDR空间
6 float *output = (float *)0xC0000000;
7 int num_axes = 4;
8 int output_shape[4] = {1, 3, 224, 224};
9 int perm[4] = {0, 2, 3, 1};
10 int strides[4] = {150528, 50176, 224, 1};
11 int out_strides[4] = {150528, 50176, 224, 1};
12 int core_mask = 0xff;
13
14 fp_transpose_s(num_axes, output_shape, perm, strides, out_strides, input, output, core_mask);
15 return 0;
16}
私有存储版本:
-
void fp_transpose_p(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const float *in_data, float *out_data)
-
void hp_transpose_p(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const half *in_data, half *out_data)
-
void dp_transpose_p(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const double *in_data, double *out_data)
-
void i8_transpose_p(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const int8_t *in_data, int8_t *out_data)
-
void i16_transpose_p(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const int16_t *in_data, int16_t *out_data)
-
void i32_transpose_p(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const int *in_data, int *out_data)
-
void c64_transpose_p(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const float *in_data, float *out_data)
-
void c128_transpose_p(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const double *in_data, double *out_data)
C调用示例:
1#include <stdio.h>
2#include <transpose.h>
3
4int main() {
5 float *input = (float *)0x10810000; // 输入在L2空间
6 float *output = (float *)0x10820000;
7 int num_axes = 4;
8 int output_shape[4] = {1, 3, 224, 224};
9 int perm[4] = {0, 2, 3, 1};
10 int strides[4] = {150528, 50176, 224, 1};
11 int out_strides[4] = {150528, 50176, 224, 1};
12
13 fp_transpose_p(num_axes, output_shape, perm, strides, out_strides, input, output);
14 return 0;
15}